from bs4 import BeautifulSoup as Soup
import os
import sys
import csv
os.chdir(r"D:\JihongLeeLab\EPO_2020")

f = open("equivalents.csv", "w", encoding="utf-8", newline="")
wf = csv.writer(f)

def parser(year):
	rdir = "EPO{}.xml".format(year)
	xmls = open(rdir, "r", encoding="utf-8").read()
	xml_list = xmls.split('<?xml version="1.0" encoding="utf-8"?>')
	for xml in xml_list[1:]:
		soup = Soup(xml, "xml")
		appnumber = soup.find('appnum').text.split()
		kind = soup.find("kind").text.split()
		equiv_list = soup.find_all("ops:inquiry-result")
		for equiv in equiv_list:
			equiv_number = equiv.find("doc-number").text.split()
			wf.writerow([appnumber, kind, equiv_number])
			print("{}-{}-{}".format(appnumber, kind, equiv_number))

for year in range(1948, 2018):
	parser(year)